## R Script Output ------------------------------------------------------------
# Appendix Table D.1: Descriptive Statistics for DiD Analysis


## Instructions ----------------------------------------------------------------
# Step 1: Adjust MAIN_DIR to where README.txt is located
# Step 2: Run entire script


## IMPORTANT NOTE --------------------------------------------------------------
# Appendix Table D.1 uses Orbis' proprietary data on firm size and public 
# listing status. To protect Orbis' proprietary data, this script loads saved 
# summary data for these variables. I create the anonymous summary data using 
# the code below. Statistics for other variables are computed directly from the 
# main dataset.
#
# # load main dataset
# load(file = paste(MAIN_DIR, "data-merge-91-17.RData", sep = "/"))
# 
# # subset
# table.d1.data <- data.merge %>%
#   select(h1b_deny_rate,
#          size,
#          public) %>% 
#   filter(!is.na(h1b_deny_rate)) %>%
#   mutate(sizeSmall = ifelse(size == "Small", 1, 0),
#          sizeMedium = ifelse(size == "Medium", 1, 0),
#          sizeLarge = ifelse(size == "Large", 1, 0),
#          sizeVery_Large = ifelse(size == "Very Large", 1, 0)) %>%
#   select(-size, -h1b_deny_rate) %>%
#   data.frame()
# 
# # summarize
# table.d1.data.n <- table.d1.data %>%
#   summarize_all(~ sum(!is.na(.x))) %>%
#   t()
# 
# table.d1.data.mean <- table.d1.data %>%
#   summarize_all(mean, na.rm = TRUE) %>%
#   t()
# 
# table.d1.data.median <- table.d1.data %>%
#   summarize_all(median, na.rm = TRUE) %>%
#   t()
# 
# table.d1.data.sd <- table.d1.data %>%
#   summarize_all(sd, na.rm = TRUE) %>%
#   t()
# 
# table.d1.data.min <- table.d1.data %>%
#   summarize_all(min, na.rm = TRUE) %>%
#   t()
# 
# table.d1.data.max <- table.d1.data %>%
#   summarize_all(max, na.rm = TRUE) %>%
#   t()
# 
# # combine
# table.d1.data.anonymous <- cbind(table.d1.data.n,
#                                  table.d1.data.mean,
#                                  table.d1.data.median,
#                                  table.d1.data.sd,
#                                  table.d1.data.min,
#                                  table.d1.data.max)
# 
# # set column names
# table.d1.data.anonymous <- data.frame(table.d1.data.anonymous)
# colnames(table.d1.data.anonymous) <- c("N", "Mean", "Median", "St. Dev.", "Min", "Max")
# rownames(table.d1.data.anonymous) <- c("Public Firm", 
#                                        "Size: Small",
#                                        "Size: Medium",
#                                        "Size: Large",
#                                        "Size: Very Large")
# 
# # round
# table.d1.data.anonymous <- table.d1.data.anonymous %>%
#   mutate(N = round(N, digits = 0))
# 
# # save
# save(table.d1.data.anonymous, 
#      file = paste(MAIN_DIR, "Appendix-Table-D1-data-anonymous.RData", sep = "/"))


## setup -----------------------------------------------------------------------
# clean slate
rm(list = ls())
date()

# load packages
pkg <- c("tidyverse",
         "stargazer")
  

lapply(pkg, require, character.only = TRUE)

# set main directory
MAIN_DIR <- "~/Dropbox/Research/JOP-h1b-replication"


## load data -------------------------------------------------------------------
load(file = paste(MAIN_DIR, "data-merge-91-17-orbis-excluded.RData", sep = "/"))
load(file = paste(MAIN_DIR, "Appendix-Table-D1-data-anonymous.RData", sep = "/"))


## create variable name concordance for tables ---------------------------------
var.df <- tibble(var = c(
  "h1b_deny_rate",
  
  "lob_img_2016",
  "lob_img_2017",
  "lob_text_hs_h1b_visa_2017",
  "lob_text_hs_h1b_visa_uscis_2017",
  "lob_text_hs_h1b_visa_dol_2017",
  "lob_text_hs_h1b_visa_dhs_2017",
  "lob_text_hs_h1b_visa_wh_eop_2017",
  "lob_text_hs_h1b_visa_only_congress_2017",
  "lob_only_tob_2017",
  "lob_only_bev_2017",
  "lob_only_cdt_2017",
  
  "lob_img_2017:post_2017",
  "lob_text_hs_h1b_visa_2017:post_2017",
  "lob_text_hs_h1b_visa_uscis_2017:post_2017",
  "lob_text_hs_h1b_visa_dol_2017:post_2017",
  "lob_text_hs_h1b_visa_dhs_2017:post_2017",
  "lob_text_hs_h1b_visa_wh_eop_2017:post_2017",
  "lob_text_hs_h1b_visa_only_congress_2017:post_2017",
  
  "lob_only_tob_2017:post_2017",
  "lob_only_bev_2017:post_2017",
  "lob_only_cdt_2017:post_2017",
  
  "sizeSmall",
  "sizeMedium",
  "sizeLarge",
  "sizeVery Large",
  "sizeVery_Large",
  "public",
  
  "lob_img_2017:post_2004",
  "post_2017:lob_img_2016",
  "lob_img_2017:year_1992",
  "lob_img_2017:year_1993",
  "lob_img_2017:year_1994",
  "lob_img_2017:year_1995",
  "lob_img_2017:year_1996",
  "lob_img_2017:year_1997",
  "lob_img_2017:year_1998",
  "lob_img_2017:year_1999",
  "lob_img_2017:year_2000",
  "lob_img_2017:year_2001",
  "lob_img_2017:year_2002",
  "lob_img_2017:year_2003",
  "lob_img_2017:year_2004",
  "lob_img_2017:year_2005",
  "lob_img_2017:year_2006",
  "lob_img_2017:year_2007",
  "lob_img_2017:year_2008",
  "lob_img_2017:year_2009",
  "lob_img_2017:year_2010",
  "lob_img_2017:year_2011",
  "lob_img_2017:year_2012",
  "lob_img_2017:year_2013",
  "lob_img_2017:year_2014",
  "lob_img_2017:year_2015"),
  var_name = c(
    "H-1B Denial Rate",
    
    "2016 IMM Lobbying (any)",
    "2017 IMM Lobbying (any)",
    "2017 IMM Lobbying (``Skilled''/``H-1B''/``Visa'')",
    "2017 IMM Lobbying (``Skilled''/``H-1B''/``Visa'' \\& targets USCIS)",
    "2017 IMM Lobbying (``Skilled''/``H-1B''/``Visa'' \\& targets DOL)",
    "2017 IMM Lobbying (``Skilled''/``H-1B''/``Visa'' \\& targets DHS)",
    "2017 IMM Lobbying (``Skilled''/``H-1B''/``Visa'' \\& targets WH/EOP)",
    "2017 IMM Lobbying (``Skilled''/``H-1B''/``Visa'' \\& targets Only Congress)",
    "2017 Tobacco Lobbying Only",
    "2017 Beverage Lobbying Only",
    "2017 Commodities Lobbying Only",
    
    "2017 IMM Lobbying (any) x Trump Administration (2017)",
    "2017 IMM Lobbying (``Skilled''/``H-1B''/``Visa'') x Trump Administration (2017)",
    "2017 IMM Lobbying (``Skilled''/``H-1B''/``Visa'' \\& targets USCIS) x Trump Administration (2017)",
    "2017 IMM Lobbying (``Skilled''/``H-1B''/``Visa'' \\& targets DOL) x Trump Administration (2017)",
    "2017 IMM Lobbying (``Skilled''/``H-1B''/``Visa'' \\& targets DHS) x Trump Administration (2017)",
    "2017 IMM Lobbying (``Skilled''/``H-1B''/``Visa'' \\& targets WH/EOP) x Trump Administration (2017)",
    "2017 IMM Lobbying (``Skilled''/``H-1B''/``Visa'' \\& targets Only Congress) x Trump Administration (2017)",
    
    "Tobacco Lobbying Only in 2017 x Trump Administration (2017)",
    "Beverage Lobbying Only in 2017 x Trump Administration (2017)",
    "Commodities Lobbying Only in 2017 x Trump Administration (2017)",
    
    "Size: Small",
    "Size: Medium",
    "Size: Large",
    "Size: Very Large",
    "Size: Very Large",
    "Public Firm",
    
    "2017 IMM Lobbying (any) x Placebo Timing (2004)",
    "2016 IMM Lobbying (any) x Trump Administration (2017)",
    "2017 IMM Lobbying (any) x Placebo Timing (1992)",
    "2017 IMM Lobbying (any) x Placebo Timing (1993)",
    "2017 IMM Lobbying (any) x Placebo Timing (1994)",
    "2017 IMM Lobbying (any) x Placebo Timing (1995)",
    "2017 IMM Lobbying (any) x Placebo Timing (1996)",
    "2017 IMM Lobbying (any) x Placebo Timing (1997)",
    "2017 IMM Lobbying (any) x Placebo Timing (1998)",
    "2017 IMM Lobbying (any) x Placebo Timing (1999)",
    "2017 IMM Lobbying (any) x Placebo Timing (2000)",
    "2017 IMM Lobbying (any) x Placebo Timing (2001)",
    "2017 IMM Lobbying (any) x Placebo Timing (2002)",
    "2017 IMM Lobbying (any) x Placebo Timing (2003)",
    "2017 IMM Lobbying (any) x Placebo Timing (2004)",
    "2017 IMM Lobbying (any) x Placebo Timing (2005)",
    "2017 IMM Lobbying (any) x Placebo Timing (2006)",
    "2017 IMM Lobbying (any) x Placebo Timing (2007)",
    "2017 IMM Lobbying (any) x Placebo Timing (2008)",
    "2017 IMM Lobbying (any) x Placebo Timing (2009)",
    "2017 IMM Lobbying (any) x Placebo Timing (2010)",
    "2017 IMM Lobbying (any) x Placebo Timing (2011)",
    "2017 IMM Lobbying (any) x Placebo Timing (2012)",
    "2017 IMM Lobbying (any) x Placebo Timing (2013)",
    "2017 IMM Lobbying (any) x Placebo Timing (2014)",
    "2017 IMM Lobbying (any) x Placebo Timing (2015)")
)


## function to replace variable names ------------------------------------------
replaceVarName <- function(var.vec, var.df){
  # Prepare output vector
  out.vec <- rep(NA, length(var.vec))
  matches <- match(var.vec, var.df$var)
  out.vec <- var.df[matches,]$var_name
  
  if(any(is.na(out.vec))){
    warning(paste("Variable concordence missing: ", 
                  paste(var.vec[is.na(out.vec)], collapse = ", "), 
                  sep = ""))
  } else{
    
  }
  
  return(out.vec)
}


## Appendix Table D.1: Descriptive Statistics for DiD Analysis -----------------
# Non-Orbis statistics
# subset
des.table <- data.merge %>%
  select(
    h1b_deny_rate,
    
    lob_img_2016,
    lob_img_2017,
    lob_text_hs_h1b_visa_2017,
    lob_text_hs_h1b_visa_uscis_2017,
    lob_text_hs_h1b_visa_dol_2017,
    lob_text_hs_h1b_visa_dhs_2017,
    lob_text_hs_h1b_visa_wh_eop_2017,
    lob_text_hs_h1b_visa_only_congress_2017,
    
    lob_only_tob_2017,
    lob_only_bev_2017,
    lob_only_cdt_2017) %>% 
  filter(!is.na(h1b_deny_rate)) %>%
  data.frame()

# extract variable names
var.label.des <- names(des.table)

# save
sink(file.path(MAIN_DIR, "Appendix-Table-D1-non-Orbis.txt"))
#sink(file.path(MAIN_DIR, "Appendix-Table-D1-non-Orbis.tex"))
stargazer(des.table, 
          type = "text", 
          #type = "latex", 
          label = "tb:des-stats-did",
          font.size = "small",
          digits = 4,
          title = "{\\bf Descriptive Statistics for DiD Analysis}",
          summary.stat = c("n", "mean", "median", "sd", "min", "max"),
          covariate.labels = replaceVarName(var.vec = var.label.des,
                                            var.df = var.df))
sink()


# Orbis statistics
# save
sink(file.path(MAIN_DIR, "Appendix-Table-D1-Orbis.txt"))
#sink(file.path(MAIN_DIR, "Appendix-Table-D1-Orbis.tex"))
stargazer(table.d1.data.anonymous, 
          type = "text", 
          #type = "latex", 
          label = "tb:des-stats-did",
          font.size = "small",
          digits = 4,
          title = "{\\bf Descriptive Statistics for DiD Analysis}",
          summary = FALSE)
sink()
